---
geometry: margin = 0.7in
output: pdf_document
header-includes:
    - \usepackage[labelformat=empty]{caption}
---

```{r setup, warning = FALSE, message = FALSE, echo = F}
library(tidyverse)
library(stargazer)
library(xtable)
library(RColorBrewer)
library(gridExtra)
library(Zelig)
options(xtable.comment = FALSE)
options(xtable.table.placement = "!h")
options(dplyr.summarise.inform = FALSE)
library(srvyr)
library(survey)
library(scales)
library(patchwork)
library(gcookbook)
library(kableExtra)

############################### DATA CLEANING #########################################

# load public-use data file - download and unzip "Feb15 Data Release" from Pew's Website
# set working directory
setwd()
# read in data
public <- foreign::read.spss("Feb15 Multiracial_cleaned.sav", to.data.frame = T, use.value.labels = F)   

# load in dataset containing new variables constructed from public-use data and write-in responses in restricted-use data
new <- read.csv("~/new_variables.csv")

# merge by CaseID
data <- left_join(public, new, by = 'CaseID')


# RECODE DEMOGRAPHIC VARIABLES
# recode gender
# attributes(merged$PPGENDER) #ref = male
data$ppgender <- as.factor(data$PPGENDER) 
data$ppgender <- factor(data$ppgender, levels = c(1,2), labels = c("Male", "Female"))

#recode region
data$region <- as.factor(data$PPREG4) # 1 = Northeast, 2 = Midwest, 3 = South 4 = West
data$region <- factor(data$region, labels = c("Northeast", "Midwest", "South", "West"))
data$region <- relevel(data$region, ref = "South") 

# recode age so that reference age/0 is equal to 18 (the youngest age possible)
data$age <- data$PPAGE - 18

# recode education
data$educ2cat <- ifelse(data$PPEDUCAT == 4, 1, 0)
data$educ2cat <- factor(data$educ2cat, levels = c(0,1), labels = c("No BA", "BA plus"))

#recode income -- take midpoint of each category, divide by 1000, and log
#midpoint for top category derived from a pareto-based formula; Hout 2004  
# 681 observations in second highest category and 1162 obs in highest category
V <- (log(681 + 1162) - log(1162))/(log(175000) - log(150000))
M <- 175000*0.5*(1 + (V/(V - 1)))
data <- data %>% mutate(income = case_when(PPINCIMP == 1 ~ 2500,
                                           PPINCIMP == 2 ~ (7499 - 5000)/2 + 5000,
                                           PPINCIMP == 3 ~ (9999 - 7500)/2 + 7500,
                                           PPINCIMP == 4 ~ (12499 - 10000)/2 + 10000,
                                           PPINCIMP == 5 ~ (14999 - 12500)/2 + 12500,
                                           PPINCIMP == 6 ~ (19999 - 15000)/2 + 15000,
                                           PPINCIMP == 7 ~ (24999 - 20000)/2 + 20000,
                                           PPINCIMP == 8 ~ (29999 - 25000)/2 + 25000,
                                           PPINCIMP == 9 ~ (34999 - 30000)/2 + 30000,
                                           PPINCIMP == 10 ~ (39999 - 35000)/2 + 35000,
                                           PPINCIMP == 11 ~ (49999 - 40000)/2 + 40000,
                                           PPINCIMP == 12 ~ (59999 - 50000)/2 + 50000,
                                           PPINCIMP == 13 ~ (74999 - 60000)/2 + 60000,
                                           PPINCIMP == 14 ~ (84999 - 75000)/2 + 75000,
                                           PPINCIMP == 15 ~ (99999 - 85000)/2 + 85000,
                                           PPINCIMP == 16 ~ (124999 - 100000)/2 + 100000,
                                           PPINCIMP == 17 ~ (149999 - 125000)/2 + 125000,
                                           PPINCIMP == 18 ~ (174999 - 150000)/2 + 150000,
                                           PPINCIMP == 19 ~ M))
# express in thousands
data$income <- data$income/1000
#logged household income
data$logged.income <- log(data$income)

# survey language
data$xspanish <- as.factor(data$XSPANISH) #ref english

# For Section 3 of Appendix:
# Recode nativity into binary: foreign born = 2, US born = 1, with US born as reference group
# (code missing and refused as US-born, flag later)
# Foreign born technically includes Puerto Rico and other U.S. territories, so US born really means "born in the 50 states"
data$nativity <- ifelse(data$BORN > 1, 2, 1)
data$nativity <- ifelse(is.na(data$BORN), 1, data$nativity)
data$nativity <- factor(data$nativity, levels = c(1, 2), labels = c("native", "foreign born"))
# Flag for missing or refused
data$born.missing <- ifelse(data$BORN == -1, 1, 0)
data$born.missing <- ifelse(is.na(data$BORN), 1, data$born.missing)
data$born.missing <- factor(data$born.missing, levels = c(0,1), labels = c("not missing", "missing"))

# Recode marital status -- 1 = married
data$marit2cat <- ifelse(data$PPMARIT == 1, 1, 0)
data$marit2cat <- factor(data$marit2cat, levels = c(0,1), labels = c("Not Married", "Married"))

###############################################################################
# Creating Ancestry Variables
#any_X = 1 if R selected X category for mom, dad, grandparents
#any_X also = 1 if R said R had ggparents or ancestors of diff race than self/mom/dad/gparents,
#and that race was X category

data <- data %>% mutate(# White ancestry
                        any_white = case_when(QR2a == 1 ~ 1,
                                              QR3a == 1 ~ 1,
                                              QR4a == 1 ~ 1,
                                              QR5 == 1 & QR5a == 1 ~ 1,
                                              TRUE ~ 0),
                        # Hispanic/Latino ancestry
                        any_latino = case_when(QR2b == 1 ~ 1,
                                               QR3b == 1 ~ 1,
                                               QR4b == 1 ~ 1,
                                               QR5 == 1 & QR5b == 1 ~ 1,
                                               TRUE ~ 0),
                        # Black ancestry
                        any_black = case_when(QR2c == 1 ~ 1,
                                              QR3c == 1 ~ 1,
                                              QR4c == 1 ~ 1,
                                              QR5 == 1 & QR5c == 1 ~ 1,
                                              TRUE ~ 0),
                        # Asian ancestry
                        any_asian = case_when(QR2d == 1 ~ 1,
                                              QR3d == 1 ~ 1,
                                              QR4d == 1 ~ 1,
                                              QR5 == 1 & QR5d == 1 ~ 1,
                                              TRUE ~ 0),
                        # American Indian ancestry
                        any_indian = case_when(QR2e == 1 ~ 1,
                                               QR3e == 1 ~ 1,
                                               QR4e == 1 ~ 1,
                                               QR5 == 1 & QR5e == 1 ~ 1,
                                               TRUE ~ 0),
                        # Native Hawaiian or Pacific Islander,
                        any_nhopi = case_when(QR2f == 1 ~ 1,
                                              QR3f == 1 ~ 1,
                                              QR4f == 1 ~ 1,
                                              QR5 == 1 & QR5f == 1 ~ 1,
                                              TRUE ~ 0),
                        # Other
                        any_other = case_when(QR2g == 1 ~ 1,
                                              QR3g == 1 ~ 1,
                                              QR4g == 1 ~ 1,
                                              QR5 == 1 & QR5g == 1 ~ 1,
                                              TRUE ~ 0))

################################################################################################
# racial ancestry regimes
data$regime <- case_when(data$any_black == 1 ~ "Black",
                         data$any_black == 0 & data$any_latino == 1 ~ "Hispanic",
                         data$any_black == 0 & data$any_latino == 0 & 
                           data$any_asian == 1 ~ "Asian",
                         data$any_black == 0 & data$any_latino == 0 &
                           data$any_asian == 0 & data$any_indian == 1 ~ "Indigenous",
                         data$any_black == 0 & data$any_latino == 0 &
                           data$any_asian == 0 & data$any_nhopi == 1 ~ "Indigenous",
                         data$any_black == 0 & data$any_latino == 0 &
                           data$any_asian == 0 & data$any_indian == 0 & data$any_nhopi == 0 ~ "Other")

## convert to factor and make "Other" the reference category
data$regime <- factor(data$regime) %>%
                  fct_relevel(., "Other", "Black", "Hispanic", "Asian", "Indigenous")

########################################################################################
# Outcome variables

################################## 
# Reports Multiracial Ancestry 

# if R did not report MOM, DAD, GPARENT, OR ANCESTOR ancestry, we can't tell presence and timing  
# of MR ancestry. In that case, R is flagged as missing (n = 95) 

data <- data %>% mutate(ancestry_incomplete = case_when(QR2a == 9 ~ 1,
                                                        QR2b == 9 ~ 1,
                                                        QR2c == 9 ~ 1,
                                                        QR2d == 9 ~ 1, 
                                                        QR2e == 9 ~ 1,
                                                        QR2f == 9 ~ 1,
                                                        QR2g == 9 ~ 1,
                                                        QR3a == 9 ~ 1, 
                                                        QR3b == 9 ~ 1,
                                                        QR3c == 9 ~ 1,
                                                        QR3d == 9 ~ 1, 
                                                        QR3e == 9 ~ 1,
                                                        QR3f == 9 ~ 1, 
                                                        QR3g == 9 ~ 1,
                                                        QR4a == 9 ~ 1, 
                                                        QR4b == 9 ~ 1,
                                                        QR4c == 9 ~ 1, 
                                                        QR4d == 9 ~ 1, 
                                                        QR4e == 9 ~ 1,
                                                        QR4f == 9 ~ 1,
                                                        QR4g == 9 ~ 1,
                                                        # if QR5a is missing, QR5b-g is also missing
                                                        QR5 == 1 & QR5a == 9 ~ 1,
                                                        TRUE ~ 0))

#### people are MR Aware if they report two or more racial categories in their ancestry
data <- data %>% 
  mutate(total_ancestry_races = any_white + any_black + any_latino + any_asian + 
                              any_indian +  any_nhopi + any_other,
         reportsMR = ifelse(total_ancestry_races >= 2, 1, 0)) 

# PLUS those who said "yes" to other ancestry despite not specifying which category (QR5 == 1)
# (aka they reported 1 racial category across all ancestors but said earlier ancestors were another race)
data$reportsMR <- ifelse(data$QR5 == 1, 1, data$reportsMR)

# remove ancestry_incomplete cases from reportsMR coding
data$reportsMR <- ifelse(data$ancestry_incomplete == 1, NA, data$reportsMR)

##########################################################
### Multiracial Self-ID (Selects 2 or more races for self)

# recode Refused to Missing
# (if QR1a == 9 for a respondent, then QR1b-g is also 9; R refused to answer QR1)
data[data$QR1a == 9, "QR1a"] <- NA
data[data$QR1b == 9, "QR1b"] <- NA
data[data$QR1c == 9, "QR1c"] <- NA
data[data$QR1d == 9, "QR1d"] <- NA
data[data$QR1e == 9, "QR1e"] <- NA
data[data$QR1f == 9, "QR1f"] <- NA
data[data$QR1g == 9, "QR1g"] <- NA

# sum up the number of races reported
data <- data %>% mutate(num_races = QR1a + QR1b + QR1c + QR1d + QR1e + QR1f + QR1g,
# MR Self- ID: 1 if respondent reports 2 or more racial categories for self, 0 if not
                        MRid = ifelse(num_races >= 2, 1, 0)) 

### recoding write-in responses
# R's who self-identify as "Some Other Race" may fall into the following scenarios:
# 1) If they self-id as SOR along with TWO OR MORE other categories (QR1g == 1 & num_races >= 3) they are coded as MR-identifying

# 2) If they self-id as SOR along with ONE other category (QR1g == 1 & num_races == 2), then there are two possibilities:
#    A) The write-in response was irrelevant or redundant with the other category they selected 
#       (e.g. write-in was "German" when they checked "White" and "Some Other Race"), so the "race_R" variable was recoded 
#       to a single race (one character long string).
#    B) The write-in response indicates that the respondent was expressing multiracial identity. In 
#       these cases, the "race_R" variable would be recoded to indicate 2+ races/origins and would be 2+ characters long.

# 3) If they self-id as SOR ONLY (QR1g == 1 & num_races == 1), then there are two possibilities:
#    A) The write-in response did NOT indicate multiracial self-identification. In these cases, 
#       the "race_R" variable would remain blank.
#    B) The write-in response DID indicate multiracial self-identification. In these cases, the "race_R"
#       variable would be recoded to a 2+ character string indexing the categories their write-in response mentioned. 


# Cases that belong to 1) do not need to be recoded because they are counted as multiracially self-identifying anyway.
# Cases that belong to 2A) NEED TO BE CODED BACK TO MONORACIAL because would be counted as multiracial self-id in our coding scheme, even though manual review indicates that they are not expressing multiracial identity.
# Cases that belong to 2B) do not need to be recoded because they are counted as multiracial self-id in our coding scheme and manual review agrees.
# Cases that belong to 3A) do not need to be recoded because they are counted as monoracial self-id in our coding scheme and manual review agrees.
# Cases that belong to 3B) NEED TO BE CODED TO MULTIRACIAL because they are counted as monoracial in our codings scheme, but manual review indicates that they are expressing multiracial identity.

# Cases that should be recoded back to monoracial (2A)
monoracial_cases <- unname(unlist(data %>% filter(num_races == 2 & QR1g == 1) %>% 
  mutate(races = nchar(race_R)) %>%
  filter(races == 1) %>%
  select(CaseID))) 
data$MRid <- ifelse(data$CaseID %in% monoracial_cases, 0, data$MRid)

# Cases that should be recoded to multiracial (3B)
multiracial_other_cases <- unname(unlist(data %>% filter(num_races == 1 & QR1g == 1) %>% 
  mutate(races = nchar(race_R)) %>%
  filter(races >= 1) %>%
  select(CaseID)))    
data$MRid <- ifelse(data$CaseID %in% multiracial_other_cases, 1, data$MRid)


# relabel levels
data$MRid <- factor(data$MRid, levels = c(0,1), labels = c("Selects 1 race", "Selects 2+ races"))

########### Combine MR awareness and identification to one variable (useful for some descriptives)
data <- data %>% mutate(AwareID = as.factor(case_when(reportsMR == 0 ~ "Monoracial Ancestry",
                                                      reportsMR == 1 & MRid == "Selects 1 race" ~ 
                                                        "Multiracial Ancestry;\nMonoracial Self-identification",
                                                      TRUE ~ "Multiracial Ancestry and\nSelf-identification")))
data$AwareID = factor(data$AwareID, 
                      levels = c("Monoracial Ancestry",
                                 "Multiracial Ancestry;\nMonoracial Self-identification",
                                 "Multiracial Ancestry and\nSelf-identification"))

# collapse AwareID into two categories -- Multiracial and Monoracial Self-ID
data$AwareID_2cat = as.factor(ifelse(data$reportsMR == 1 & data$MRid == "Selects 2+ races",  
                                 "Multiracial Self-identification", 
                                 "Monoracial Self-identification"))

#########################################################################################################
# recode generation (following previous RA's approach but counting Some Other Race as category)

# create dummy for having Some Other Race in ancestry and 
# recode race_mom, race_dad, race_grand, and race_great variables so that "O" (for "Other") is added 

# note: if the respondent didn't select QR#a-f before (e.g. only SOR was selected),
# race_mom/dad/grand/great will be blank
data <- data %>% mutate(QR2gz = ifelse(QR2g == 1, "O", NA),
               QR3gz = ifelse(QR3g == 1, "O", NA),
               QR4gz = ifelse(QR4g == 1, "O", NA),
               QR5gz = ifelse(QR5g == 1, "O", NA)) %>%
  mutate(new_race_mom = ifelse(is.na(QR2gz), race_mom, paste(race_mom, QR2gz, sep = "")),
         new_race_dad = ifelse(is.na(QR3gz), race_dad ,paste(race_dad, QR3gz, sep = "")),
         new_race_grand = ifelse(is.na(QR4gz), race_grand, paste(race_grand, QR4gz, sep = "")),
         new_race_great = ifelse(is.na(QR5gz), race_great, paste(race_great, QR5gz, sep = ""))) 

# generation dummy variables
data <- data %>%
########## identify monoracial (0 generations)
# if one racial category is reported in ancestry AND does NOT answer "yes" to QR5 
# (same results as matching string of race_mom, race_dad, etc.)
  mutate(gen0 = ifelse(total_ancestry_races == 1 & QR5 != 1, 1, 0),
########## identify 1st gen
# if both parents are single race AND those races are different AND QR5 != 1
         gen1 = ifelse(new_race_mom != new_race_dad &
                         nchar(new_race_mom) == 1 &
                         nchar(new_race_dad) == 1 &
                         QR5 != 1, 1, 0))
# but these include cases where grandparents have a value that aren't in parents 
grand_notinpar_cases <- data %>%
  filter(gen1 == 1) %>%
  # put space between characters in grandparent race string
  mutate(race_grand_space = trimws(gsub('(.{1})', '\\1 ',new_race_grand))) %>%
  # separate rows and match, flag == 1 if there is a race in grandparents that's not in mom or dad
  separate_rows(race_grand_space, sep = " ") %>%
  mutate(grand_new_flag = ifelse(race_grand_space == new_race_mom | race_grand_space == new_race_dad,
                              0, 1)) %>%
  # save CaseIDs of cases with grandparent new races
  group_by(CaseID) %>%
  summarise(grand_new_races = sum(grand_new_flag)) %>%
  filter(grand_new_races >= 1) %>%
  select(CaseID)
# recode these cases as NOT first gen
data$gen1 <- ifelse(data$CaseID %in% unlist(grand_notinpar_cases), 0, data$gen1)
  
# create generation variable - everyone who is not gen 0 or gen 1 is 2
data <- data %>% mutate(gens2cat = case_when(gen0 == 1 ~ 0,
                                             gen1 == 1 ~ 1,
                                             TRUE ~ 2))
# unless you have incomplete ancestry info, then you are missing
data$gens2cat <- ifelse(data$ancestry_incomplete, NA, data$gens2cat)
```

```{r, echo = F}
##### CREATE ANALYTIC SAMPLE

########### Drop cases with incomplete ancestry OR incomplete self-ID
data <- filter(data, ancestry_incomplete == 0 & !is.na(MRid))

```

```{r, echo = F}
##### WEIGHTS 

######### Creating Composite Weights #########
# Three sets of survey weights:
# weight1: Stage 2 sample (national) (xmixed 8-11) n = 21,133
# weight2: Stage 2 sample (for qflag = 1 sample only) n = 2,096
# weight3: Stage 1 sample national (xmixed 1-7) # n = 1,483 
# every observation has a value for either weight1 OR weight3
# we don't care about weight2 because those are to generalize to the multiracial population only (or what Pew defines as such)
# need to weight the  weight1 and weight3 weights and note stage 1 vs stage 2

data <- data %>% mutate(weight1_ratio = 21133/22616,
                        weight3_ratio = 1483/22616,
                        composite_weight = ifelse(!is.na(weight1), weight1*weight1_ratio, 
                                           ifelse(!is.na(weight3), weight3*weight3_ratio, NA)),
                        stage = ifelse(xmixed <= 7, 1, 2)) 
# these weights are relative and do not add up to sample size.
# divide by sum of weights so that weights add up to 1 and multiply by sample size
data$absolute_weight <- (data$composite_weight*nrow(data))/(sum(data$composite_weight))

# confirm that every row has a composite and absolute weight
#data %>% filter(is.na(composite_weight)) %>% nrow()
#data %>% filter(is.na(absolute_weight)) %>% nrow()

# confirm that absolute weight adds up to sample size (N = 22,616)
#sum(data$absolute_weight)

#### create svydesign object for overall sample
weighted_data <- data %>% as_survey_design(ids = 1 , weights = absolute_weight)

##################################################################
########## create MR reporting only dataset and subsequent weights
MR <- filter(data, reportsMR == 1)
# turn generation variable into factor
MR$gens2cat <- as.factor(MR$gens2cat)

# now, we will normalize within this dataset so that these new weights now add up to the MR sample size
MR$absolute_weight <-(MR$composite_weight*nrow(MR))/(sum(MR$composite_weight))

#sum(MR$absolute_weight)

#### create svydesign object for MR only sample
weighted_MR <- MR %>% as_survey_design(ids = 1 , weights = absolute_weight)
```


# Online Appendix

We provide details of five additional robustness checks referenced in the article: 

1. Frequencies of multiracial ancestry awareness and self-identification based on non-mutually-exclusive racial ancestry groupings rather than the nested racial regime classifications presented in the main text.

2. Frequencies of multiracial awareness and self-identification, by regime, gender and generation, based on weighted data.

3. Regression models of multiracial ancestry awareness and self-identification that include nativity and marital status as controls.

4. Frequencies of multiracial ancestry awareness and self-identification based on an alternate coding of "Hispanic" that only includes respondents who state that being Hispanic or Latino is part of their *racial* background (as opposed to ethnic), and subsequent frequencies by gender and generation.

5. Frequencies of multiracial ancestry awareness and self-identification by gender and generation based on an alternate coding of generation that distinguishes 4th+ gen (same race for all parents and grandparents, different race earlier ancestors) from respondents who have mixed-race parents or grandparents.

\vspace{24pt}

## Section 1. Multiracial ancestry awareness and self-identification by racial ancestry

```{r, echo = F, message = F, warning = F, results = 'asis'}
# Table 3: ID and Awareness based on non-mutually exclusive *dummy-coded* racial ancestry categories
# e.g. if you report White ancestry for parents, grandparents, or earlier ancestors, you are in the "White" regime
blackMR <- filter(data, any_black == 1 & reportsMR == 1)
latinoMR <- filter(data, any_latino == 1 & reportsMR == 1)
asianMR <- filter(data, any_asian == 1 & reportsMR == 1)
indigMR <- filter(data, any_indian == 1 | any_nhopi == 1) %>% filter(reportsMR == 1)
otherMR <- filter(data, any_other == 1 & reportsMR == 1)
whiteMR <- filter(data, any_white == 1 & reportsMR == 1)

# create function that loops by race for N and awareness 
raceID_function <- function(race_var, race_data){
  # total N
  N <- table(data[  ,race_var])[2]
  # aware N
  aware_N <- nrow(race_data)
  # reports MR ancestry
  aware_perc <- aware_N*100/N
  aware_perc <- paste(as.character(format(round(aware_perc, 1), nsmall = 1)), "%", sep = "") 
  #put together
  all <- c(N, aware_perc, aware_N)
  return(all)
}

# concatenate above columns with self-ID as 2+ races, regime race, or another race
# for each racial ancestry category
black <- c(raceID_function("any_black", blackMR),
            unlist(blackMR %>% 
              mutate(MonoID = case_when(MRid == "Selects 2+ races" ~ "MR",
                                        MRid == "Selects 1 race" & QR1c == 1 ~ "Regime Race", 
                                        TRUE ~ "Other")) %>%
              group_by(MonoID) %>%
              dplyr::summarise(Freq = n()) %>%
              mutate(Percentage = paste(as.character(format(round(100 * Freq/sum(Freq), 1), nsmall = 1)), 
                            "%", sep = "")) %>%
              select(-c(Freq)) %>%
              spread(MonoID, Percentage))
            )

hispanic <- c(raceID_function("any_latino", latinoMR),
            unlist(latinoMR %>% 
              mutate(MonoID = case_when(MRid == "Selects 2+ races" ~ "MR",
                                        MRid == "Selects 1 race" & QR1b == 1 ~ "Regime Race", 
                                        TRUE ~ "Other")) %>%
              group_by(MonoID) %>%
              dplyr::summarise(Freq = n()) %>%
              mutate(Percentage = paste(as.character(format(round(100 * Freq/sum(Freq), 1), nsmall = 1)), 
                            "%", sep = "")) %>%
              select(-c(Freq)) %>%
              spread(MonoID, Percentage))
            )

asian <- c(raceID_function("any_asian", asianMR),
            unlist(asianMR %>% 
              mutate(MonoID = case_when(MRid == "Selects 2+ races" ~ "MR",
                                        MRid == "Selects 1 race" & QR1d == 1 ~ "Regime Race", 
                                        TRUE ~ "Other")) %>%
              group_by(MonoID) %>%
              dplyr::summarise(Freq = n()) %>%
              mutate(Percentage = paste(as.character(format(round(100 * Freq/sum(Freq), 1), nsmall = 1)), 
                            "%", sep = "")) %>%
              select(-c(Freq)) %>%
              spread(MonoID, Percentage))
            )

# indigenous -- this one is a little different for awareness/MRid
indig_n <- nrow(data %>% filter(any_indian == 1 | any_nhopi == 1))
indig_aware <- data %>% filter(any_indian == 1 | any_nhopi == 1) %>%
  group_by(reportsMR) %>%
  dplyr::summarise(Freq = n()) %>%
  mutate(Percentage = paste(as.character(format(
                        round(100 * Freq/(indig_n), 1),
                        nsmall = 1)),"%", sep = "")) %>%
  filter(reportsMR == 1) %>%
  select(Percentage)
# concatenate 
indig <- c(indig_n, unlist(indig_aware), nrow(indigMR),
           unlist(indigMR %>% 
            mutate(MonoID = case_when(MRid == "Selects 2+ races" ~ "MR",
                                      MRid == "Selects 1 race" & QR1e == 1 ~ "Regime Race", 
                                      MRid == "Selects 1 race" & QR1f == 1 ~ "Regime Race",
                                      TRUE ~ "Other")) %>% 
            group_by(MonoID) %>%
            summarise(Freq = n()) %>%
            mutate(Percentage = paste(as.character(format(round(100 * Freq/sum(Freq), 1), nsmall = 1)), 
                            "%", sep = "")) %>%
            select(-c(Freq)) %>%
            spread(MonoID, Percentage))
)

# other
other <- c(raceID_function("any_other", otherMR),
            unlist(otherMR %>% 
              mutate(MonoID = case_when(MRid == "Selects 2+ races" ~ "MR",
                                        MRid == "Selects 1 race" & QR1g == 1 ~ "Regime Race", 
                                        TRUE ~ "Other")) %>% 
              group_by(MonoID) %>%
              summarise(Freq = n()) %>%
              mutate(Percentage = paste(as.character(format
                                                     (round(100 * Freq/sum(Freq), 1), nsmall = 1)), 
                            "%", sep = "")) %>%
              select(-c(Freq)) %>%
              spread(MonoID, Percentage))
            )

white <- c(raceID_function("any_white", whiteMR),
            unlist(whiteMR %>% 
              mutate(MonoID = case_when(MRid == "Selects 2+ races" ~ "MR",
                                        MRid == "Selects 1 race" & QR1a == 1 ~ "White", 
                                        TRUE ~ "Other")) %>%
              group_by(MonoID) %>%
              dplyr::summarise(Freq = n()) %>%
              mutate(Percentage = paste(as.character(format(round(100 * Freq/sum(Freq), 1), nsmall = 1)), 
                            "%", sep = "")) %>%
              select(-c(Freq)) %>%
              spread(MonoID, Percentage))
            )

#merge all together
race_dummy_outcomes <- data.frame(rbind(black, hispanic, asian,indig,white,other)) %>%
                        mutate(Race = c("Black",
                                        "Hispanic",
                                        "Asian",
                                        "Indigenous",
                                        "White",
                                        "Some Other Race")) %>%
                        select(Race, X1, V2, V3, MR, Regime.Race, Other)
colnames(race_dummy_outcomes) <- c("Racial Ancestry", 
                                   "N", 
                                   "Aware of MR Ancestry",
                                   "N",
                                   "2+ Races",
                                   "Regime Race",
                                   "Another Race")
rownames(race_dummy_outcomes) <- NULL

#print
kable(race_dummy_outcomes, "latex", booktabs = T,  
      caption = "Table A1: Awareness and Self-Identification by Racial Ancestry (Non Mutually-Exclusive)") %>%
  kable_styling(position = "center", 
                latex_options = c("striped", "HOLD_position")) %>% 
  add_header_above(c(" " = 1, 
                     "In Full Sample" = 2, 
                     "Self-Identification among those with MR Ancestry" = 4),
                   bold = T) %>%
  column_spec(1, width = "3 cm") %>%
  column_spec(2, width = "1.2 cm") %>%
  column_spec(4, width = "1.2 cm") %>%
  footnote(general = "Pew Research Center’s 2015 Survey of Multiracial Adults",
           general_title = "Source:",
           footnote_as_chunk = T) 
```

*Note*: Each row in the above table represents a group of respondents with a certain racial ancestry, alone or in combination with other races. Unlike our racial regimes coding scheme, this method of tabulation does not put respondents into mutually-exclusive categories, so respondents who report multiple races in their ancestry are counted in more than one row. "Regime Race" refers to the racial ancestry that everyone counted in a given row shares in common, whether alone or in combination. For example, for respondents who reported any Asian ancestry, the "Regime Race" column shows the percentage of multiracially-aware individuals who self-identify as "Asian" alone.  
  

\newpage

## Section 2. Weighted results

```{r, echo = F, message = F, warning = F, results = 'asis'}
# Table 3: ID and Awareness by Regime
# N 
regime_N <- weighted_data %>% 
  group_by(regime) %>% 
  dplyr::summarise(N = srvyr::survey_total()) %>%
  mutate(N = round(N, 0)) %>%
  select(-N_se)

# awareness prevalence
aware_regime_table <- weighted_data %>% 
  group_by(reportsMR, regime) %>%
  summarise(Freq = survey_total()) %>%
  group_by(regime) %>%
  mutate(TotalAware = paste(as.character(format(round(100 * Freq/sum(Freq), 1), nsmall = 1)), "%", sep = "")) %>%
  filter(reportsMR == 1) %>% 
  select(-c(reportsMR, Freq, Freq_se))

# aware N
aware_N <- weighted_data %>% 
  group_by(reportsMR, regime) %>%
  summarise(Freq = survey_total()) %>%
  filter(reportsMR == 1) %>% 
  select(-c(reportsMR, Freq_se)) %>%
  mutate(Freq = round(Freq, 0))

#join
regime_outcomes <- left_join(regime_N, aware_regime_table, by = "regime") %>%
                   left_join(., aware_N)
# remove "Other" row
regime_outcomes <- regime_outcomes[2:5, c(1,2,3,5)]

# regime-specific self-ID: as MR, as regime race, as another race
# black
mono_black <- unlist(
   weighted_data %>%
    filter(regime == "Black" & reportsMR == 1) %>%
    mutate(MonoID = case_when(MRid == "Selects 2+ races" ~ "2+ Races",
                              MRid == "Selects 1 race" &
                                  QR1c == 1 ~ "Regime Race", 
                              TRUE ~ "Other")) %>%
    group_by(MonoID) %>%
    dplyr::summarise(Freq = survey_total()) %>%
    mutate(Percentage = paste(as.character(format(round(100 *Freq/(sum(Freq)), 1), nsmall = 1)), 
                            "%", sep = "")) %>%
    select(-c(Freq, Freq_se)) %>%
    spread(MonoID, Percentage))

# hispanic
mono_latino <- unlist(
  weighted_data %>%
    filter(regime == "Hispanic" & reportsMR == 1) %>%
    mutate(MonoID = case_when(MRid == "Selects 2+ races" ~ "2+ Races",
                              MRid == "Selects 1 race" &
                                QR1b == 1 ~ "Regime Race", 
                              TRUE ~ "Other")) %>%
    group_by(MonoID) %>%
    dplyr::summarise(Freq = survey_total()) %>%
    mutate(Percentage = paste(as.character(format(round(100 * Freq/(sum(Freq)), 1), nsmall = 1)), 
                            "%", sep = "")) %>%
    select(-c(Freq, Freq_se)) %>%
    spread(MonoID, Percentage))

# asian
mono_asian <- unlist(
  weighted_data %>%
    filter(regime == "Asian" & reportsMR == 1) %>% 
    mutate(MonoID = case_when(MRid == "Selects 2+ races" ~ "2+ Races",
                              MRid == "Selects 1 race" & 
                                QR1d == 1 ~ "Regime Race", 
                              TRUE ~ "Other")) %>%
    group_by(MonoID) %>%
    summarise(Freq = survey_total()) %>%
    mutate(Percentage = paste(as.character(format(round(100 * Freq/(sum(Freq)), 1), nsmall = 1)), 
                            "%", sep = "")) %>%
    select(-c(Freq, Freq_se)) %>%
    spread(MonoID, Percentage)) 

# indigenous
mono_indig <- unlist(
  weighted_data %>%
    filter(regime == "Indigenous" & reportsMR == 1) %>%
    mutate(MonoID = case_when(MRid == "Selects 2+ races" ~ "2+ Races",
                            MRid == "Selects 1 race" & 
                              QR1e == 1 ~ "Regime Race",
                            MRid == "Selects 1 race" & 
                              QR1f == 1 ~ "Regime Race",
                              TRUE ~ "Other")) %>% 
    group_by(MonoID) %>%
    summarise(Freq = survey_total()) %>%
    mutate(Percentage = paste(as.character(format(round(100 * Freq/(sum(Freq)), 1), nsmall = 1)), 
                            "%", sep = "")) %>%
    select(-c(Freq, Freq_se)) %>%
    spread(MonoID, Percentage))

#merge all together
mono_ID <- data.frame(rbind(mono_black,
                            mono_latino,
                            mono_asian,
                            mono_indig))
regime_outcomes$MR <- mono_ID[ ,"X2..Races"]
regime_outcomes$RegimeRace <- mono_ID$Regime.Race
regime_outcomes$Other <- mono_ID$Other
colnames(regime_outcomes) <- c("Regime",
                               "N",
                               "Aware of MR Ancestry",
                               "N",
                               "2+ Races",
                               "Regime Race",
                               "Another Race")

#print
kable(regime_outcomes, "latex", booktabs = T, 
      caption = "Table A2: Awareness and Self-Identification by Racial Regime (Weighted)") %>%
  kable_styling(position = "center", 
                latex_options = c("striped", "HOLD_position")) %>% 
  add_header_above(c(" " = 1, 
                     "In Full Sample" = 2, 
                     "Self-Identification among those with MR Ancestry" = 4),
                   bold = T) %>%
  column_spec(1, width = "3 cm") %>%
  column_spec(2, width = "1.2 cm") %>%
  column_spec(4, width = "1.2 cm") %>%
  footnote(general = "Pew Research Center’s 2015 Survey of Multiracial Adults",
           general_title = "Source:",
           footnote_as_chunk = T) 
```


\newpage
  
          
```{r, echo = F, message = F, warning = F, fig.show ='hold',fig.align='center', fig.height = 7}
# Figure 1: Reporting MR ancestry and Self-ID by gender

# table - conventional view
just_id_table <- weighted_data %>% 
  group_by(AwareID_2cat, ppgender) %>%
  summarise(Freq = survey_total()) %>%
  select(-Freq_se) %>%
  rename(Gender = ppgender) %>%
  group_by(Gender) %>%
  mutate(PercentLabel = paste(as.character(format(round(100 * Freq/sum(Freq), 1), nsmall = 1)),
                              "%", sep = "")) 
just_id_table$Gender <- relevel(just_id_table$Gender, ref = "Female")

#set colors
colors_0 <- c(brewer.pal(3, "Blues")[3], brewer.pal(3, "Blues")[1])

#plot
fig1a <- ggplot(just_id_table, aes(x = Gender, y = Freq, 
                                  fill = forcats::fct_rev(AwareID_2cat))) + 
  geom_bar(stat = "identity", position = "stack") +
  labs(y = "Count", x = "",
       fill = "") +
  scale_fill_manual(values = colors_0) +
  theme_minimal() +
  geom_text(aes(x = Gender, y = Freq, label = PercentLabel, 
                group = forcats::fct_rev(AwareID_2cat)),
            position = position_stack(vjust = .5), size = 3) +
  theme(axis.text.x = element_text(size = 8, face = "bold"),
        axis.title.y = element_text(size = 10, face = "bold"),
        legend.text = element_text(size = 8),
        legend.position = "bottom") +
  guides(fill = guide_legend(nrow = 2)) +
  scale_y_continuous(label = comma, limits = c(0, 12700), breaks = c(2500, 5000, 7500, 10000, 12500)) +
  theme(legend.key.height=unit(1, "cm")) +
  theme(panel.grid.major.x = element_blank())

############################################################################################
# table - with ancestry/self-id distinction
aware_id_table <- weighted_data %>% 
  group_by(AwareID, ppgender) %>%
  summarise(Freq = survey_total()) %>%
  select(-Freq_se) %>%
  rename(Gender = ppgender) %>%
  group_by(Gender) %>%
  mutate(PercentLabel = paste(as.character(format(round(100 * Freq/sum(Freq), 1), nsmall = 1)),
                              "%", sep = "")) 
aware_id_table$Gender <- relevel(aware_id_table$Gender, ref = "Female")

#set colors
colors1 <- c(brewer.pal(3, "Blues")[3], brewer.pal(3, "Blues")[2], brewer.pal(3, "Blues")[1])

#labels of statistical significance
#weights::wtd.chi.sq(var1 = weighted_data[["variables"]]$ppgender, 
#           var2 = weighted_data[["variables"]]$AwareID, 
#           weight = weighted_data[["variables"]]$absolute_weight) # p = 1.445853e-08 ***
#weights::wtd.chi.sq(var1 = weighted_data[["variables"]]$ppgender, 
#                    var2 = weighted_data[["variables"]]$AwareID_2cat,
#                    weight = weighted_data[["variables"]]$absolute_weight) # p = 0.3206762
labels.df <- data.frame(Gender = "Male" ,
                        Freq = 11500,
                        AwareID = "Monoracial Ancestry",
                        lab = "***")

#plot
fig1b <- ggplot(aware_id_table, aes(x = Gender, y = Freq, 
                                   fill = forcats::fct_rev(AwareID))) + 
  geom_bar(stat = "identity", position = "stack",
           aes(color = forcats::fct_rev(AwareID))) +
  labs(y = "Count", x = "",
       fill = "") +
  scale_fill_manual(values = colors1) +
  scale_color_manual(values = c('black', 'black', NA), guide = F) +
  theme_minimal() +
  geom_text(aes(x = Gender, y = Freq, label = PercentLabel, 
                group = fct_rev(AwareID)),
            position = position_stack(vjust = 0.5), size = 3) +
  geom_text(data = labels.df, label = "***", hjust = 3.2, vjust = -1.1, size = 5) +
  theme(axis.text.x = element_text(size = 8, face = "bold"),
        axis.title.y = element_text(size = 10, face = "bold"),
        legend.text = element_text(size = 8),
        legend.position = "bottom") +
  guides(fill = guide_legend(nrow = 3)) +
  scale_y_continuous(label = comma, limits = c(0, 12700), breaks = c(2500, 5000, 7500, 10000, 12500)) +
  theme(legend.key.height=unit(1, "cm")) +
  theme(panel.grid.major.x = element_blank())

#combine
cowplot::plot_grid(fig1a, fig1b, ncol = 2, align = "h")
```

\begin{center}
Figure A1: (Weighted)Conventional accounting based on the presence or absence of multiracial identification  (\textit{left panel}) versus also accounting for Americans who are aware of having multiracial ancestry but self-identify with only one race (\textit{right panel}).  
  
$^{*}$p<0.05; $^{**}$p<0.01; $^{***}$p<0.001 (Pearson's Chi-Squared Test for statistically significant differences by gender) 
\end{center}

&nbsp;


```{r, echo = F, message = F, warning = F, fig.show ='hold',fig.align='center', fig.height = 7, fig.width=7}
# Figure 2: % Self-id as proportion of aware (total and by generation)

# table - all generations
id_table <- weighted_MR %>% 
  group_by(MRid, ppgender) %>%
  summarise(Freq = survey_total()) %>%
  select(-Freq_se) %>%
  rename(Gender = ppgender) %>%
  group_by(Gender) %>%
  mutate(PercentLabel = paste(as.character(format(round(100 * Freq/sum(Freq), 1), nsmall = 1)),
                              "%", sep = ""),
         Generation = c("All Generations")) %>%
  select(Gender, Generation, MRid, Freq, PercentLabel)
id_table$Gender <- relevel(id_table$Gender, ref = "Female")

# table - by generation
id_gen_table <- weighted_MR %>%
  mutate(gens2cat = factor(gens2cat, labels = c("First Generation", "Second Generation or Higher"))) %>%
  group_by(ppgender, gens2cat, MRid) %>%
  summarise(Freq = survey_total()) %>%
  select(-Freq_se) %>%
  rename(Gender = ppgender, Generation = gens2cat) %>%
  group_by(Gender, Generation) %>%
  mutate(PercentLabel = paste(as.character(format(round(100 * Freq/sum(Freq), 1), nsmall = 1)), "%", sep = ""))
id_gen_table$Gender <- relevel(id_gen_table$Gender, ref = "Female")

# combine tables
id_all_table <- bind_rows(id_table, id_gen_table)
id_all_table$Generation <- factor(id_all_table$Generation,
                                     levels = c("All Generations",
                                                "First Generation",
                                                "Second Generation or Higher"))

#set colors
colors2 <- c(brewer.pal(3, "Blues")[3], brewer.pal(3, "Blues")[2])

################################################ Statistical significance and labels
gen1 <- filter(weighted_MR, gens2cat == 1)
gen2 <- filter(weighted_MR, gens2cat == 2)
#weights::wtd.chi.sq(var1 = weighted_MR$variables$ppgender, 
#                    var2 = weighted_MR$variables$MRid,
#                    weight = weighted_MR$variables$absolute_weight) #p = 0.002715 **
#weights::wtd.chi.sq(var1 = gen1$variables$ppgender,
#                    var2 = gen1$variables$MRid,
#                    weight = gen1$variables$absolute_weight) #p = 0.1136
#weights::wtd.chi.sq(var1 = gen2$variables$ppgender,
#                    var2 = gen2$variables$MRid,
#                    weight = gen2$variables$absolute_weight) #p = 4.065433e-05 ***

#labels 
label1.df <- data.frame(Gender = "Male" ,
                        Freq = 2500,
                        MRid = "Selects 1 race",
                        Generation = "All Generations",
                        lab = "**")
label3.df = data.frame(Gender = "Male",
                       Freq = 2200,
                       MRid = "Selects 1 race",
                       Generation = "Second Generation or Higher",
                       label = "***")

label1.df$Generation <- factor(label1.df$Generation,
                               levels = c("All Generations",
                                                "First Generation",
                                                "Second Generation or Higher"))

label3.df$Generation <- factor(label3.df$Generation,
                               levels = c("All Generations",
                                                "First Generation",
                                                "Second Generation or Higher"))

########################################### PLOT ##################################
ggplot(id_all_table, aes(x = Gender, y = Freq, fill = fct_rev(MRid))) + 
  geom_bar(stat = "identity", position = "stack") +
  labs(y = "Count", x = "\nGender and Multiracial Generation",
       fill = "") +
  scale_fill_manual(values = colors2) +
  scale_y_continuous(label = comma, 
                     limits = c(0,2600),
                     breaks = c(500, 1000, 1500, 2000, 2500)) +
  theme_minimal() +
  geom_text(aes(x = Gender, y = Freq, label = PercentLabel, 
                group = fct_rev(MRid)),
        position = position_stack(vjust = 0.5), size = 3) +
  facet_wrap(.~Generation, strip.position = "bottom") +
  theme(strip.placement = "outside",
        strip.text.x = element_text(size=8, face = "bold")) +
  geom_text(data = label1.df, label = "**", hjust = 6.6, size = 5) +
  geom_text(data = label3.df, label = "***", hjust = 2.5, size = 5) +
  theme(axis.text.x = element_text(size = 8, face = "bold", margin = margin(t = -10)),
        axis.text.y = element_text(size = 8),
        axis.title.y = element_text(size = 10, face = "bold"),
        axis.title.x = element_text(size = 10, face = "bold"),
        legend.position = c(0.99, 1.02),
        legend.justification = c("right", "top"),
        legend.box.just = c("right", "top"),
        legend.margin = margin(0, 0, 0, 0),
        legend.text=element_text(size=8),
        legend.key.height = unit(0.5, "cm")) +
  theme(panel.grid.major.x = element_blank(),
        panel.grid.minor.x = element_blank(),
        panel.spacing.x = unit(-0.1, "lines"))
```

\begin{center}
Figure A2: Multiracial self-identification by gender and generation among those who report multiracial ancestry (Weighted)  \\
  
$^{*}$p<0.05; $^{**}$p<0.01; $^{***}$p<0.001 (Pearson's Chi-Squared Test for statistically significant differences by gender) 
\end{center}

&nbsp;

\newpage

## Section 3. Models controlling for marital status and nativity

```{r, echo = F, message = F, results = 'asis'}
# Table 4: regression predicting awareness and predicting id - no interactions
# first without regime and then with
aware_fit <- glm(reportsMR ~ ppgender + region + age + logged.income + educ2cat + xspanish +
                   marit2cat + nativity + born.missing, 
                 data = data,
                 family = binomial)

aware_fit_regime <- glm(reportsMR ~ ppgender + regime + region + age + logged.income + 
                          educ2cat + xspanish +
                          marit2cat + nativity + born.missing, 
                 data = data,
                 family = binomial)

id_fit <- glm(MRid ~ ppgender + region + age + logged.income + educ2cat + xspanish +
                marit2cat + nativity + born.missing,
              data = MR,
              family = binomial)

id_fit_regime <- glm(MRid ~ ppgender + regime + region +  age + logged.income + educ2cat + xspanish +
                       marit2cat + nativity + born.missing,
                  data = MR,
                 family = binomial)

get.or.se <- function(model) {
    broom::tidy(model) %>% 
    mutate(or = exp(estimate),
    var.diag = diag(vcov(model)),
    or.se = sqrt(or^2 * var.diag)) %>%
    select(or.se) %>% unlist %>% unname
}

stargazer(aware_fit, aware_fit_regime, id_fit, id_fit_regime,
          title = "Table A3: Odds of Multiracial Ancestry Awareness and Self-Identification",
          header = F, digits = 3, intercept.bottom = T, t.auto = F, p.auto = F, no.space = T,
          apply.coef = exp,
          se = list(get.or.se(aware_fit),
                    get.or.se(aware_fit_regime),
                    get.or.se(id_fit),
                    get.or.se(id_fit_regime)),
          dep.var.labels = c("Aware of MR Ancestry", "Self-Identifies with Multiple Races"),
          column.labels = c("Gender + Controls", "+ Regime", 
                            "Gender + Controls", "+ Regime"),
          star.cutoffs = c(0.05, 0.01, 0.001),
          add.lines = list(c("Log Likelihood", round(logLik(aware_fit),0),
                                                    round(logLik(aware_fit_regime),0),
                                                    round(logLik(id_fit),0),
                                                     round(logLik(id_fit_regime), 0)),
                           c("Akaike Inf. Crit.", round(AIC(aware_fit),0),
                                                    round(AIC(aware_fit_regime),0),
                                                    round(AIC(id_fit),0),
                                                     round(AIC(id_fit_regime), 0)),
                           c("Bayesian Inf. Crit.", round(BIC(aware_fit),0),
                                                    round(BIC(aware_fit_regime),0),
                                                    round(BIC(id_fit),0),
                                                     round(BIC(id_fit_regime), 0))),
          covariate.labels = c("Female", 
                               "Black", "Hispanic", "Asian", "Indigenous",
                               "Northeast", "Midwest", "West", "Age",
                               "Logged Income", "BA or higher", "Spanish Version",
                               "Married", "Foreign Born", "Nativity Info Missing",
                               "Intercept"),
          keep.stat = c("n"))

```
*Note*: The Pew data only includes place of birth for a select subsample of respondents. We created an indicator variable that flagged all the missing responses to this question.

\newpage

```{r, echo = F, message = F, results = 'asis'}
# Table 5: Self-ID: linearly add generation, then, gender*generation, then 3-way interaction
id_gen_fit <- glm(MRid ~ gens2cat + ppgender + regime + 
                    age + region + educ2cat + logged.income + xspanish + 
                    marit2cat + nativity + born.missing,
                  data = MR,
                  family = binomial)

id_gender_gen_fit <- glm(MRid ~ ppgender*gens2cat + regime + 
                           age + region + educ2cat + logged.income + xspanish + 
                           marit2cat + nativity + born.missing, 
                         data = MR,
                         family = binomial)

three_way_fit <- glm(MRid ~ ppgender*gens2cat*regime + 
                       age + region + educ2cat + logged.income + xspanish + 
                       marit2cat + nativity + born.missing,
                     data = MR,
                     family = binomial)

get.or.se <- function(model) {
    broom::tidy(model) %>% 
    mutate(or = exp(estimate),
    var.diag = diag(vcov(model)),
    or.se = sqrt(or^2 * var.diag)) %>%
    select(or.se) %>% unlist %>% unname
}

stargazer(id_gen_fit, id_gender_gen_fit, three_way_fit,
          title = "Table A4: Odds of Multiracial Self-Identification (Controlling for Nativity and Marital Status)",
          header = F, digits = 3, intercept.bottom = T, t.auto = F, p.auto = F, no.space = T,
          apply.coef = exp,
          se = list(get.or.se(id_gen_fit),
                    get.or.se(id_gender_gen_fit),
                    get.or.se(three_way_fit)),
          dep.var.labels = c("Self-Identifies with Multiple Races"),
          column.labels = c("+ Generation", "Gender x Generation", "Gender x Generation x Regime"),
          star.cutoffs = c(0.05, 0.01, 0.001),
          add.lines = list(c("Controls Included", rep("Yes", 3)),
                           c("Log Likelihood", round(logLik(id_gen_fit),0),
                                                    round(logLik(id_gender_gen_fit),0),
                                                    round(logLik(three_way_fit),0)),
                           c("Akaike Inf. Crit.", round(AIC(id_gen_fit),0),
                                                    round(AIC(id_gender_gen_fit),0),
                                                    round(AIC(three_way_fit),0)),
                           c("Bayesian Inf. Crit.", round(BIC(id_gen_fit),0),
                                                    round(BIC(id_gender_gen_fit),0),
                                                    round(BIC(three_way_fit),0))),
          keep.stat = c("n"),
          omit = c("age", "region", "educ2cat", "logged.income", "xspanish", 
                   "marit2cat", "nativity", "born.missing"),
          order = c(2, 1, 3:6, 17:29, 7:16, 30),
          covariate.labels = c("Female", "Second+ Generation",  
                               "Black", "Hispanic", "Asian", "Indigenous", 
                               "Second+ Generation x Female", 
                               "Female x Black", "Female x Hispanic", 
                               "Female x Asian", "Female x Indigenous",
                               "Second+ Gen x Black", "Second+ Gen x Hispanic",
                               "Second+ Gen x Asian", "Second+ Gen x Indigenous", 
                               "Female x Second+ Gen x Black",
                               "Female x Second+ Gen x Hispanic", 
                               "Female x Second+ Gen x Asian",
                               "Female x Second+ Gen x Indigenous",
                               "Intercept"))
```


\newpage

## Section 4. Alternative Hispanic Coding

About 63\% of respondents who reported Hispanic ancestry explicitly stated that being Hispanic was part of their racial background. In this alternative coding, these respondents are the only ones we include as having Hispanic race or origin.
  
However, it should be noted that almost half (~45\%) of the other 37\% (Hispanic-ancestry respondents who either said being Hispanic is only part of their ethnic background or didn't know) did not report any other races in their family trees. This suggests that many Hispanic respondents who say they don't see Hispanic as a racial category are reporting it as one anyway.

```{r, echo = F, message = F, warning = F}
# Create new hispanic_race dummy var and set = 1 if R says being hispanic is part of their racial background 
data <- data %>% mutate(hispanic_race = case_when(is.na(QR6) ~ NA_real_,
                                                  QR6 == 1 ~ 1,
                                                  QR6 == 3 ~ 1,
                                                  TRUE ~ 0)) 

# Percentage of Hispanic Rs who see being Hispanic as being part of their racial background:
#prop.table(table(data$hispanic_race))*100

# Percentage of Hispanic Rs who DO NOT see being Hispanic as part of their racial background but did not report any other races in ancestry:
#data %>% 
#  group_by(hispanic_race, total_ancestry_races) %>% 
#  summarise(Freq = n()) %>%
#  group_by(hispanic_race) %>%
#  mutate(percent = 100*Freq/sum(Freq)) %>% 
#  filter(hispanic_race == 0)

# recode any_latino to only include hispanic_race == 1
data$any_latino2 <- ifelse(data$any_latino == 1 & data$hispanic_race == 1, 1, 0)

# Recode Multiracial Ancestry reporting with new any_latino2 variable
#### people are MR Aware if they report more than 1 race in ancestry
data <- data %>% 
  mutate(total_ancestry_races2 = any_white + any_black + any_latino2 + any_asian + 
                              any_indian +  any_nhopi + any_other) %>%
  mutate(reportsMR2 = ifelse(total_ancestry_races2 >= 2, 1, 0)) 

# plus those who said "yes" to other ancestry despite not specifying (QR5 == 1)
# (aka they reported 1 race in ancestry but said earlier ancestors were another race)
data$reportsMR2 <- ifelse(data$QR5 == 1, 1, data$reportsMR2)

# remove ancestry_incomplete cases from reportsMR coding
data$reportsMR2 <- ifelse(data$ancestry_incomplete == 1, NA, data$reportsMR2)

# Identity Outcome: Selects 2+ Races - 1 if respondent checks 2+ boxes for self-reported race, 0 if not ###
data$self_hispanic_race <- ifelse(data$QR1b == 1 & data$hispanic_race == 1, 1, 0)
data$alt.races2 <- data$QR1a + data$self_hispanic_race + data$QR1c + data$QR1d + data$QR1e + data$QR1f + data$QR1g  
# if 2 or more, self-id as MR
data$MRid2 <- ifelse(data$alt.races2 >= 2, 1, 0) 

#### Account for write-in responses
# cases that should be recoded back to monoracial 
monoracial_cases_alt <- unname(unlist(data %>% filter(alt.races2 == 2 & QR1g == 1) %>% 
  mutate(races = nchar(race_R)) %>%
  filter(races == 1) %>%
  select(CaseID)))
data$MRid2 <- ifelse(data$CaseID %in% monoracial_cases_alt, 0, data$MRid2)

# cases that only checked "other" but indicate multiraciality 
multiracial_other_cases_alt <- unname(unlist(data %>% filter(alt.races2 < 2 & QR1g == 1) %>% 
  mutate(races = nchar(race_R)) %>%
  filter(races >= 1) %>%
  select(CaseID))) 
data$MRid2 <- ifelse(data$CaseID %in% multiracial_other_cases_alt, 1, data$MRid2)

#### Relabel levels
data$MRid2 <- factor(data$MRid2, levels = c(0,1), labels = c("Selects 1 race", "Selects 2+ races"))

# recode generation with new total_ancestry_races2 variable
data <- data %>%
########## identify monoracial (0 generations)
# iif only one race reported in ancestry AND does NOT answer "yes" to QR5 
# (same results as matching string of race_mom, race_dad, etc.)
  mutate(gen0_2 = ifelse(total_ancestry_races2 < 2 & QR5 != 1, 1, 0),
########## identify 1st gen
# iif both parents are single race and those races are different PLUS
# no races other than these two races are reported for gparents and QR5 != 1
         gen1_2 = ifelse(new_race_mom != new_race_dad &
                         nchar(new_race_mom) == 1 &
                         nchar(new_race_dad) == 1 &
                         QR5 != 1, 1, 0))
# but these include cases where grandparents have a value that aren't in parents 
grand_notinpar_cases2 <- data %>%
  filter(gen1_2 == 1) %>%
  # put space between grandparent string
  mutate(race_grand_space = trimws(gsub('(.{1})', '\\1 ',new_race_grand))) %>%
  # separate rows and match, flag == 1 if there is a race in grandparents that's not in mom or dad
  separate_rows(race_grand_space, sep = " ") %>%
  mutate(grand_new_flag = ifelse(race_grand_space == new_race_mom | race_grand_space == new_race_dad,
                              0, 1)) %>%
  # save CaseIDs of cases with grandparent new races
  group_by(CaseID) %>%
  summarise(grand_new_races = sum(grand_new_flag)) %>%
  filter(grand_new_races >= 1) %>%
  select(CaseID)
# recode these cases as NOT first gen
data$gen1_2 <- ifelse(data$CaseID %in% unlist(grand_notinpar_cases2), 0, data$gen1_2)
  
# create generation variable - everyone who is not gen 0 or gen 1 is 2
data$gens2cat2 <- ifelse(data$gen0_2 == 1, 0, 
                        ifelse(data$gen1_2 == 1, 1, 2))
# unless you have incomplete ancestry info, then you are missing
data$gens2cat2 <- ifelse(data$ancestry_incomplete, NA, data$gens2cat2)

# regimes with new any_latino2 var
data$regime2 <- case_when(data$any_black == 1 ~ "Black",
                         data$any_black == 0 & data$any_latino2 == 1 ~ "Hispanic",
                         data$any_black == 0 & data$any_latino2 == 0 & 
                           data$any_asian == 1 ~ "Asian",
                         data$any_black == 0 & data$any_latino2 == 0 &
                           data$any_asian == 0 & data$any_indian == 1 ~ "Indigenous",
                         data$any_black == 0 & data$any_latino2 == 0 &
                           data$any_asian == 0 & data$any_nhopi == 1 ~ "Indigenous",
                         data$any_black == 0 & data$any_latino2 == 0 &
                           data$any_asian == 0 & data$any_indian == 0 & data$any_nhopi == 0 ~ "Other")

## convert to factor and make "Other" the reference category
data$regime2 <- factor(data$regime2) %>%
                  fct_relevel(., "Other", "Black", "Hispanic", "Asian", "Indigenous")

######### create alternative MR reporting only dataset
MR_alt <- filter(data, reportsMR2 == 1)

# Combine MR awareness and identification to one variable
data$AwareID2 = as.factor(ifelse(data$reportsMR2 == 0, "Monoracial Ancestry",
                          ifelse(data$reportsMR2 == 1 & data$MRid2 == "Selects 1 race", 
                                 "Multiracial Ancestry; Monoracial Self-ID", 
                                 "Multiracial Ancestry; Multiracial Self-ID"))) 
```

```{r, echo = F, message = F, warning = F, results = 'asis'}
# Table 3: ID and Awareness by Regime
MRblacks_alt <- filter(MR_alt, regime2 == "Black")
MRlatinos_alt <- filter(MR_alt, regime2 == "Hispanic")
MRasians_alt <- filter(MR_alt, regime2 == "Asian")
MRindig_alt <- filter(MR_alt, regime2 == "Indigenous")

# N
regime_N <- data %>% group_by(regime2) %>% summarise(N = n())

# awareness prevalence
aware_regime_table <- data %>% 
  group_by(regime2, reportsMR2) %>%
  summarise(Freq = n()) %>%
  group_by(regime2) %>%
  mutate(TotalAware = paste(as.character(format(round(100 * Freq/sum(Freq), 1), nsmall = 1)), "%", sep = "")) %>%
  filter(reportsMR2 == 1) %>% 
  select(-c(reportsMR2, Freq))

#join
regime_outcomes <- left_join(regime_N, aware_regime_table, by = "regime2") %>%
  filter(regime2 != "Other")

# regime-specific ID: as MR, as race of regime, as another race
# blacks
mono_blacks <- unlist(
  MRblacks_alt %>%
    mutate(MonoID = case_when(MRid2 == "Selects 2+ races" ~ "MR",
                              MRid2 == "Selects 1 race" & QR1c == 1 ~ "RegimeRace", 
                              TRUE ~ "Other")) %>%
    group_by(MonoID) %>%
    dplyr::summarise(Freq = n()) %>%
    mutate(Percentage = paste(as.character(format(round(100 * Freq/sum(Freq), 1), nsmall = 1)), 
                            "%", sep = "")) %>%
    select(-c(Freq)) %>%
    spread(MonoID, Percentage))

# hispanics
mono_latinos <- unlist(
 MRlatinos_alt %>% 
    mutate(MonoID = case_when(MRid2 == "Selects 2+ races" ~ "MR",
                              MRid2 == "Selects 1 race" &  self_hispanic_race == 1 ~ "RegimeRace", 
                              TRUE ~ "Other")) %>%
    group_by(MonoID) %>%
    dplyr::summarise(Freq = n()) %>%
    mutate(Percentage = paste(as.character(format(round(100 * Freq/sum(Freq), 1), nsmall = 1)), 
                            "%", sep = "")) %>%
    select(-c(Freq)) %>%
    spread(MonoID, Percentage))

# asians
mono_asians <- unlist(
  MRasians_alt %>% 
  mutate(MonoID = case_when(MRid2 == "Selects 2+ races" ~ "MR",
                            MRid2 == "Selects 1 race" & QR1d == 1 ~ "RegimeRace", 
                            TRUE ~ "Other")) %>%
  group_by(MonoID) %>%
  summarise(Freq = n()) %>%
  mutate(Percentage = paste(as.character(format(round(100 * Freq/sum(Freq), 1), nsmall = 1)), 
                            "%", sep = "")) %>%
  select(-c(Freq)) %>%
  spread(MonoID, Percentage))

# indigenous
mono_indig <- unlist(
  MRindig_alt %>% 
  mutate(MonoID = case_when(MRid2 == "Selects 2+ races" ~ "MR",
                            MRid2 == "Selects 1 race" & QR1e == 1 ~ "RegimeRace", 
                            MRid2 == "Selects 1 race" & QR1f == 1 ~ "RegimeRace",
                            TRUE ~ "Other")) %>% 
  group_by(MonoID) %>%
  summarise(Freq = n()) %>%
  mutate(Percentage = paste(as.character(format(round(100 * Freq/sum(Freq), 1), nsmall = 1)), 
                            "%", sep = "")) %>%
  select(-c(Freq)) %>%
  spread(MonoID, Percentage))

#merge all together
mono_ID <- data.frame(rbind(mono_blacks, 
                            mono_latinos,
                            mono_asians, 
                            mono_indig))
regime_outcomes$MR_N <- c(nrow(MRblacks_alt), 
                          nrow(MRlatinos_alt),
                          nrow(MRasians_alt),
                          nrow(MRindig_alt))
regime_outcomes$MR <- mono_ID$MR
regime_outcomes$RegimeRace <- mono_ID$RegimeRace
regime_outcomes$Other <- mono_ID$Other
colnames(regime_outcomes) <- c("Regime",
                               "N",
                               "Aware of MR Ancestry",
                               "N",
                               "2+ Races",
                               "Regime Race",
                               "Another Race")

#print
kable(regime_outcomes, "latex", booktabs = T, 
      caption = "Table A5: Multiracial Awareness and Self-Identification by Racial Regime (Hispanic as racial background only)") %>%
  kable_styling(position = "center", 
                latex_options = c("striped", "HOLD_position")) %>% 
  add_header_above(c(" " = 1, 
                     "In Full Sample \n(N = 22,616)" = 2, 
                     "Self-Identification among those with MR Ancestry \n(N = 4,007)" = 4),
                   bold = T) %>%
  column_spec(1, width = "2.5 cm") %>%
  column_spec(2, width = "1.2 cm") %>%
  column_spec(4, width = "1.2 cm") %>%
  footnote(general = "Pew Research Center’s 2015 Survey of Multiracial Adults",
           general_title = "Source:",
           footnote_as_chunk = T) 
```

\newpage 

```{r, echo = F, message = F, warning = F, fig.show ='hold',fig.align='center', fig.height = 7, fig.width = 7}
# Figure 2: % Self-id as proportion of aware (total and by generation)
# table - total
id_table <- MR_alt %>%
  group_by(MRid2, ppgender) %>%
  summarise(Freq = n()) %>%
  rename(Gender = ppgender) %>%
  group_by(Gender) %>%
  mutate(PercentLabel = paste(as.character(format(round(100 * Freq/sum(Freq), 1), nsmall = 1)),
                              "%", sep = ""),
         Generation = c("All Generations")) %>%
  select(Gender, Generation, MRid2, Freq, PercentLabel)
id_table$Gender <- relevel(id_table$Gender, ref = "Female")

# table - by generation
id_gen_table <- MR_alt %>%
  mutate(gens2cat2 = factor(gens2cat2, labels = c("First Generation", "Second Generation or Higher"))) %>%
  group_by(ppgender, gens2cat2, MRid2) %>%
  summarise(Freq = n()) %>%
  rename(Gender = ppgender, Generation = gens2cat2) %>%
  group_by(Gender, Generation) %>%
  mutate(PercentLabel = paste(as.character(format(round(100 * Freq/sum(Freq), 1), nsmall = 1)), "%", sep = ""))
id_gen_table$Gender <- relevel(id_gen_table$Gender, ref = "Female")

# combine tables
id_all_table <- bind_rows(id_table, id_gen_table)
id_all_table$Generation <- factor(id_all_table$Generation,
                                     levels = c("All Generations",
                                                "First Generation",
                                                "Second Generation or Higher"))

#set colors
colors2 <- c(brewer.pal(3, "Blues")[3], brewer.pal(3, "Blues")[2])

################################################ Statistical significance and labels
gen1_alt <- filter(MR_alt, gens2cat2 == 1)
gen2_alt <- filter(MR_alt, gens2cat2 == 2)
#chisq.test(MR_alt$ppgender, MR_alt$MRid2) #p = 0.04126
#chisq.test(gen1_alt$ppgender, gen1_alt$MRid2) #p = 0.03448
#chisq.test(gen2_alt$ppgender, gen2_alt$MRid2) #p = 0.002534

#labels 
label1.df <- data.frame(Gender = "Male" ,
                        Freq = 2500,
                        MRid2 = "Selects 1 race",
                        Generation = "All Generations",
                        lab = "*")
label2.df = data.frame(Gender = "Male",
                       Freq = 450,
                       MRid2 = "Selects 1 race",
                       Generation = "First Generation",
                       label = "*")
label3.df = data.frame(Gender = "Male",
                       Freq = 2200,
                       MRid2 = "Selects 1 race",
                       Generation = "Second Generation or Higher",
                       label = "***")

label1.df$Generation <- factor(label1.df$Generation,
                               levels = c("All Generations",
                                                "First Generation",
                                                "Second Generation or Higher"))

label2.df$Generation <- factor(label2.df$Generation,
                               levels = c("All Generations",
                                                "First Generation",
                                                "Second Generation or Higher"))

label3.df$Generation <- factor(label3.df$Generation,
                               levels = c("All Generations",
                                                "First Generation",
                                                "Second Generation or Higher"))

########################################### PLOT ##################################

ggplot(id_all_table, aes(x = Gender, y = Freq, fill = fct_rev(MRid2))) + 
  geom_bar(stat = "identity", position = "stack") +
  labs(y = "Count", x = "\nGender and Multiracial Generation",
       fill = "") +
  scale_fill_manual(values = colors2) +
  scale_y_continuous(label = comma, 
                     limits = c(0,2600),
                     breaks = c(500, 1000, 1500, 2000, 2500)) +
  theme_minimal() +
  geom_text(aes(x = Gender, y = Freq, label = PercentLabel, 
                group = fct_rev(MRid2)),
        position = position_stack(vjust = 0.5), size = 3) +
  facet_wrap(.~Generation, strip.position = "bottom") +
  theme(strip.placement = "outside",
        strip.text.x = element_text(size=8, face = "bold")) +
  geom_text(data = label1.df, label = "*", hjust = 6.6, size = 5) +
  geom_text(data = label2.df, label = "*", hjust = 6.6, size = 5) +
  geom_text(data = label3.df, label = "***", hjust = 2.5, size = 5) +
  theme(axis.text.x = element_text(size = 8, face = "bold", margin = margin(t = -10)),
        axis.text.y = element_text(size = 8),
        axis.title.y = element_text(size = 10, face = "bold"),
        axis.title.x = element_text(size = 10, face = "bold"),
        legend.position = c(0.99, 1.02),
        legend.justification = c("right", "top"),
        legend.box.just = c("right", "top"),
        legend.margin = margin(0, 0, 0, 0),
        legend.text=element_text(size=8),
        legend.key.height = unit(0.5, "cm")) +
  theme(panel.grid.major.x = element_blank(),
        panel.grid.minor.x = element_blank(),
        panel.spacing.x = unit(-0.1, "lines"))

```

\begin{center}
Figure A3: Multiracial self-identification by gender and generation (Hispanic as racial background only).  
  
$^{*}$p<0.05; $^{**}$p<0.01; $^{***}$p<0.001 (Pearson's Chi-Squared Test for statistically significant differences by gender) 
\end{center}

&nbsp;


\newpage

## Section 5. Distinguishing 4th+ gen (same-race parents AND grandparents) from respondents with mixed-race parents or grandparents.

We cannot clearly distinguish second- and third- generation respondents from each other because respondents are asked to report the races/origins of *all* of their grandparents at the same time (rather than each grandparent at a time). This prevents us from identifying second-generation respondents who have at least one multiracial parent/grandparents of different races from third-generation respondents who have at least one multiracial grandparent. 
  
However, we can identify respondents who are fourth-generation or higher if they only reported one race for all parents and grandparents, but said that they had great-grandparents or earlier ancestors who were a different race or origin than themselves, their parents, or their grandparents. 

Making this distinction in our analyses shows that the gender differences in self-identification among higher-generation multiracials is mostly manifested in the second and third generation; multiracial self-identification is about equally rare for 4th+ generation men and 4th+ generation women. 

```{r, echo = F}
########## identify 4th+ gen
# iif the single same race is reported for mom, dad, AND grandparents but yes to QR5
data <- data %>% mutate(gen4 = ifelse(nchar(new_race_mom) == 1 &
                                      nchar(new_race_dad) == 1 &
                                      nchar(new_race_grand) == 1 &
                                      new_race_mom == new_race_dad &
                                      new_race_dad == new_race_grand &
                                      QR5 == 1, 1, 0))

# new gens3cat variable
data$gens3cat = case_when(data$gens2cat == 0 & data$gen4 == 0 ~ 0,
                          data$gens2cat == 1 & data$gen4 == 0 ~ 1,
                          data$gens2cat == 2 & data$gen4 == 0 ~ 2,
                          data$gens2cat == 2 & data$gen4 == 1 ~ 4)


# bring into MR dataset
MR_gens3cat <- filter(data, reportsMR == 1)
```

```{r, echo = F, message = F, warning = F, results = 'asis'}
### TABLES ###
# Table 1: descriptive table
#prop.table(table(data$gens3cat))*100
#prop.table(table(MR_gens3cat$gens3cat))*100

Variable <- c("Monoracial", "Multiracial: First Generation", "Multiracial: Second & Third Generation","Multiracial: Fourth Generation or higher")

Mean <- c("80.9", "2.7", "8.9", "7.5")

MR_Mean <- c("-", "14.3", "46.7", "39.0")

desp_table <- data.frame(cbind(Variable, Mean, MR_Mean))

colnames(desp_table) <- c("", "%", "%")

kableExtra::kable(desp_table, "latex", booktabs = T, caption = "Table A6: Generational Composition") %>%
  kable_styling(position = "center", 
                latex_options = c("HOLD_position")) %>% 
  add_header_above(c(" ", 
                     "Full Sample \n(N = 22,616)" = 1, 
                     "Multiracial Ancestry \n(N = 4,330)" = 1), bold = T) %>%
  pack_rows("Self-Reported Ancestry", 1, 4) %>%
  column_spec(1, width = "8.5cm") %>%
  footnote(general = "Pew Research Center’s 2015 Survey of Multiracial Adults",
           general_title = "Source:") 
```

\newpage

```{r, echo = F, message = F, warning = F, fig.show ='hold',fig.align='center', fig.height = 9, fig.width = 6}
# Figure 2: % Self-id as proportion of aware (total and by generation)
# table - total
id_table <- MR_gens3cat %>%
  group_by(MRid, ppgender) %>%
  summarise(Freq = n()) %>%
  rename(Gender = ppgender) %>%
  group_by(Gender) %>%
  mutate(PercentLabel = paste(as.character(format(round(100 * Freq/sum(Freq), 1), nsmall = 1)),
                              "%", sep = ""),
         Generation = c("All Generations")) %>%
  select(Gender, Generation, MRid, Freq, PercentLabel)
id_table$Gender <- relevel(id_table$Gender, ref = "Female")

# table - by generation
id_gen_table <- MR_gens3cat %>%
  mutate(gens3cat = factor(gens3cat, labels = c("First Generation",
                                                "Second & Third Generations",
                                                "Fourth Generation or Higher"))) %>%
  group_by(ppgender, gens3cat, MRid) %>%
  summarise(Freq = n()) %>%
  rename(Gender = ppgender, Generation = gens3cat) %>%
  group_by(Gender, Generation) %>%
  mutate(PercentLabel = paste(as.character(format(round(100 * Freq/sum(Freq), 1), nsmall = 1)), "%", sep = ""))
id_gen_table$Gender <- relevel(id_gen_table$Gender, ref = "Female")

# combine tables
id_all_table <- bind_rows(id_table, id_gen_table)
id_all_table$Generation <- factor(id_all_table$Generation,
                                     levels = c("All Generations",
                                                "First Generation",
                                                "Second & Third Generations",
                                                "Fourth Generation or Higher"))

#set colors
colors2 <- c(brewer.pal(3, "Blues")[3], brewer.pal(3, "Blues")[2])

################################################ Statistical significance and labels
gen1 <- filter(MR_gens3cat, gens3cat == 1)
gen2 <- filter(MR_gens3cat, gens3cat == 2)
gen4 <- filter(MR_gens3cat, gens3cat == 4)
#chisq.test(MR_gens3cat$ppgender, MR_gens3cat$MRid) #p = 0.01619
#chisq.test(gen1$ppgender, gen1$MRid) #p = 0.05233
#chisq.test(gen2$ppgender, gen2$MRid) #p = 2.997e-08
#chisq.test(gen4$ppgender, gen4$MRid) #p = 1

#labels 
label1.df <- data.frame(Gender = "Male" ,
                        Freq = 2500,
                        MRid = "Selects 1 race",
                        Generation = "All Generations",
                        lab = "*")

label3.df = data.frame(Gender = "Male",
                       Freq = 1800,
                       MRid = "Selects 1 race",
                       Generation = "Second & Third Generations",
                       label = "***")

label1.df$Generation <- factor(label1.df$Generation,
                               levels = c("All Generations",
                                          "First Generation",
                                          "Second & Third Generations",
                                          "Fourth Generation or Higher"))



label3.df$Generation <- factor(label3.df$Generation,
                               levels = c("All Generations",
                                          "First Generation",
                                          "Second & Third Generations",
                                          "Fourth Generation or Higher"))

########################################### PLOT ##################################

ggplot(id_all_table, aes(x = Gender, y = Freq, fill = fct_rev(MRid))) + 
  geom_bar(stat = "identity", position = "stack") +
  labs(y = "Count", x = "\nGender and Multiracial Generation",
       fill = "") +
  scale_fill_manual(values = colors2) +
  scale_y_continuous(label = comma, 
                     limits = c(0,2600),
                     breaks = c(500, 1000, 1500, 2000, 2500)) +
  theme_minimal() +
  geom_text(aes(x = Gender, y = Freq, label = PercentLabel, 
                group = fct_rev(MRid)),
        position = position_stack(vjust = 0.5), size = 3) +
  facet_wrap(.~Generation, strip.position = "bottom") +
  theme(strip.placement = "outside",
        strip.text.x = element_text(size=8, face = "bold")) +
  geom_text(data = label1.df, label = "*", hjust = 6.8, size = 5) +
  geom_text(data = label3.df, label = "***", hjust = 2.7, size = 5) +
  theme(axis.text.x = element_text(size = 8, face = "bold", margin = margin(t = -10)),
        axis.text.y = element_text(size = 8),
        axis.title.y = element_text(size = 10, face = "bold"),
        axis.title.x = element_text(size = 10, face = "bold"),
        legend.position = c(0.99, 1.02),
        legend.justification = c("right", "top"),
        legend.box.just = c("right", "top"),
        legend.margin = margin(0, 0, 0, 0),
        legend.text=element_text(size=8),
        legend.key.height = unit(0.5, "cm")) +
  theme(panel.grid.major.x = element_blank(),
        panel.grid.minor.x = element_blank(),
        panel.spacing.x = unit(-0.1, "lines"))

```

\begin{center}
Figure A4: Multiracial self-identification by gender and generation (three generation categories).  
  
$^{*}$p<0.05; $^{**}$p<0.01; $^{***}$p<0.001 (Pearson's Chi-Squared Test for statistically significant differences by gender) 
\end{center}

&nbsp;


